



********************************nearstat china all flows vs afrobarometer (precision code 1, 2, 3)




global afrobar="C:\Users\xisaka\ShareFile\Personal Folders\Afrobarometer"
global aiddata="C:\Users\xisaka\ShareFile\Personal Folders\AidData"
global china_ethnic= "C:\Users\xisaka\Dropbox\Ias\MINA PAPERS\chinese aid and ethnic sentiments\stata"
global temp="C:\Users\xisaka\ShareFile\Personal Folders\temp data"

set more off
set matsize 800



**********open the collapsed Afro

use "$afrobar/Afro_collapsed_precisioncode123_replication.dta", clear




** Add the aid projects		
append using  "$aiddata\all chinese projects to afrobar countries.dta", keep(project_location_id  latitude longitude)		

** Sort so that nearstat will run, putting all projects first
*gsort project_location_id
* Not numeric so it dd not put the projects first.
gsort latitude


** First count how many projects are within 200 km to know how many loops to run nearstat
set more off
foreach dist in 10 25 50 75 100 200 {
	nearstat g_lat g_lon, ///
		near(latitude longitude) distvar(_1projectdist`dist') ///
		ncount(numberwithin`dist') dband(0 `dist') favor(speed)
}
drop _1projectdist10 _1projectdist25 _1projectdist50 _1projectdist75 _1projectdist100 _1projectdist200

** Run nearstat loop, with number of loops equal to max of numberwithin200
sum numberwithin200
forvalues i = 1/`r(max)' {
	nearstat g_lat g_lon if `i'<=numberwithin200, ///
		near(latitude longitude) distvar(_`i'projectdist) nid(project_location_id _`i'closestproject) ///
		kth(`i') favor(speed)
}

set more off


** Add the closest project to those that don't have projects within 200km.
nearstat g_lat g_lon if numberwithin200==0, ///
	near(latitude longitude) distvar(not200_projectdist) nid(project_location_id not200_closestproject) ///
	kth(1) favor(speed)
	
* Temporary save if something goes wrong
save "$temp/temp_aid.dta", replace

replace _1closestproject=not200_closestproject if numberwithin200==0
replace _1projectdist=not200_projectdist if numberwithin200==0

*Now drop all projects (they are now linked to clusters rowwise)
drop if !missing(latitude)
drop project_location_id latitude longitude
drop if missing(g_lat)

*compress attempts to reduce the amount of memory used by your data.
compress


** Merge with detailed data on projects				
set more off
sum numberwithin200
forvalues i = 1/`r(max)' {
	rename _`i'closestproject project_location_id
	merge m:1 project_location_id using "$aiddata\all chinese projects to afrobar countries.dta", keepusing(project_id startyear endyear)
	rename project_id _`i'project_id
	rename startyear _`i'startyear
	rename endyear _`i'endyear
	rename project_location_id _`i'closestproject 
	drop if _merge == 2
	drop _merge
}

compress


** Merge back with precollapse data. Now all respondents with the same coordinates will have exactly the same projects identified nearby. 
set more off
merge 1:m g_lat g_lon using "$afrobar/Afro_tot_precisioncode123_replication.dta"
drop if _merge==2
drop _merge

save "$temp/temp_aidC.dta", replace





use "$temp/temp_aidC.dta", clear

destring *startyear, replace		

destring *endyear, replace


set more off
foreach dist in 10 25 50 75 100 200 {
	gen byte active`dist' = .
	gen byte suspended`dist' = .
	gen byte inactive`dist' = .
	gen byte activecodehelp`dist' = .
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Create a helpvariable to be used for suspended
		replace activecodehelp`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' & _`i'endyear>int_year
		* Resp active if any project is active within X km in or after interview year
		replace active`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' 
		label variable active`dist' "Active if any project is active"
		* Resp suspended if any project is suspended within X km
		replace suspended`dist' = 1 if _`i'endyear<int_year & _`i'endyear<.  & _`i'projectdist <= `dist' 
		* But not if at least one project is active within X km
		replace suspended`dist' = 0 if activecodehelp`dist'==1
		label variable suspended`dist' "Suspended if all projects have ended"
		* Resp inactive if any project is inactive within X km
		replace inactive`dist' = 1 if _`i'startyear>int_year & _`i'projectdist <= `dist'
		label variable inactive`dist' "Inactive if a project will start, conditional on no projects before"
		* But not if at least one project is either active or suspended within X km
		* Area considered "contaminated"
		replace inactive`dist' = 0 if active`dist'==1 | suspended`dist'==1

	}
	* Resp not active/inactive/suspended if never replaced in the loop above
	replace active`dist'=0 if missing(active`dist')
	replace inactive`dist'=0 if missing(inactive`dist')
	replace suspended`dist'=0 if missing(suspended`dist')
	noisily di as text "Calculated distance " as result `dist'
}


compress
save "$temp/temp_aid2C.dta", replace		//large file




* Fixed effects
set more off
** Generate variables denoting first year a project is active within X km of respondent
foreach dist in 10 25 50 75 100 200 {
	gen firstyear`dist'=.
	gen nextyear`dist'=.
	gen lastyear`dist'=0
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Firstyear replaced if the i-th project within X km opens before the i-1-th projectdist.
		replace firstyear`dist' = _`i'startyear if _`i'startyear<firstyear`dist' & _`i'projectdist<=`dist'
		label variable firstyear`dist' "Year of first project in area"
		* Lastyear replaced if the i-th projects's last year is after the previous value
		* Note: will equal 9999 if there is still a project there in 2013/2014.
		replace lastyear`dist' = _`i'endyear if _`i'endyear>lastyear`dist' & _`i'projectdist<=`dist'
		label variable lastyear`dist' "Year that last project closed in area"
	}
}


** Create variable "firstactive"
** defined as the ID of the first mine to open within X km of the respondent,
** This identifies first active project for both active and inactive respondents!

set more off

foreach dist in 10 25 50 75 100 200 {
	gen firstactive`dist'_1 = ""
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Replace firstactive if the i-th closest project is within X km and
		* this project opens in the same year as firstyear and 
		replace firstactive`dist'_1 = _`i'closestproject if _`i'projectdist<=`dist' & ///
			firstyear`dist'==_`i'startyear 
		label variable firstactive`dist'_1 "Name of first project in area"
		}
}




** Generate variable denoting first active project
foreach dist in 10 25 50 75 {
gen projfixed`dist' = firstactive`dist'_1
}

			encode _1closestproject, gen(first_proj)
			
			rename projfixed50 projfixed50_org
			encode projfixed50_org, gen(projfixed50)
						rename projfixed75 projfixed75_org
			encode projfixed75_org, gen(projfixed75)
						rename projfixed25 projfixed25_org
			encode projfixed25_org, gen(projfixed25)
		
	codebook projfixed50*
	drop projfixed50_org projfixed25_org projfixed75_org
	



**************************CREATING ADDITIONAL VARIABLES************************
tab int_year, gen(yd)
tab country, gen(cd) 

* Generate distance variable
gen distance=_1projectdist

tab region, gen (rd)
tab wave, gen (wd)			

rename active10 		ongoing10
rename inactive10 		future10
rename active25 		ongoing25
rename inactive25 		future25
rename active50 		ongoing50
rename inactive50 		future50
rename active75 		ongoing75
rename inactive75 		future75
rename active100 		ongoing100
rename inactive100 		future100
rename active200 		ongoing200
rename inactive200		future200	  

rename suspended10 		completed10
rename suspended25 		completed25
rename suspended50 		completed50
rename suspended75 		completed75
rename suspended100 	completed100
rename suspended200		completed200


*generating some additional variables and interactions
gen notpresethnic =cond(presethnic==0 ,1,0) 	if presethnic!=.		//dummy for not belonging to the presidents ethnic group
gen preseth_ongoing50=presethnic*ongoing50
gen preseth_future50=presethnic*future50
gen preseth_ongoing25=presethnic*ongoing25
gen preseth_future25=presethnic*future25


*estimation sample focusing only on countries that have both observations connected to ongoing and observations connected to future
*exclude countries where less than 1 percent of sample live near future project 
gen 	restrictedestsample	=cond(((country=="Benin" | country=="Botswana" | country=="Cape Verde" | country=="Kenya" | country=="Liberia" | country=="Madagascar" | country=="Malawi" | country=="Mali" |	country=="Namibia" | country=="Nigeria" | country=="Senegal")  					///
 & ethnicidentity!=.	 & presethnic!=. & ongoing25!=. & future25!=. & age<=100),1,0) 	

*alternative estimation sample allowing in countries with no obs connected to future25 (i.e. adding burundi cameroon Cote D'Ivoire  Guinea  Mauritius Niger Sierra Leone Zambia to the benchmark sample)
gen 	ethnicestsample2	=cond(((country=="Benin" | country=="Botswana" | country=="Cape Verde" | country=="Ghana" | country=="Kenya" | country=="Liberia" | country=="Madagascar" | country=="Malawi" | country=="Mali" 	///
| country=="Mozambique" |	country=="Namibia" | country=="Nigeria" | country=="Senegal" | country=="South Africa" | country=="Tanzania" | country=="Togo" | country=="Uganda" | country=="Zimbabwe"  					///
| country=="Burundi" | country=="Cameroon" | country=="Cote D'Ivoire" | country=="Guinea" | country=="Mauritius" | country=="Niger" | country=="Sierra Leone" | country=="Zambia" )									///
 & ethnicidentity!=.	 & presethnic!=. & ongoing25!=. & future25!=.),1,0) 	


	
*variables relevant for timing analysis
	
 forvalues XX = 1/45 {
gen _`XX'help_year = _`XX'startyear					
replace _`XX'help_year = . if _`XX'projectdist>25	
}
egen fyear=rowmin(*help_year)						
replace fyear = 0 if fyear==. 						
drop *help_year 									

gen inty_starty=int_year-fyear				
tab inty_starty								
replace inty_starty=0 if fyear==0			
tab inty_starty if ongoing25==1				
tab inty_starty if future25==1				

gen yearsbetween= abs(inty_starty)			
tab yearsbetween if ongoing25==1				
tab yearsbetween if future25==1
tab yearsbetween 							

gen project_duration 		= inty_starty if inty_starty>0 & inty_starty!=.
replace project_duration 	= 0 if inty_starty<=0 & inty_starty!=.
tab project_duration if ongoing25==0						
tab project_duration if ongoing25==1	

gen timeuntilstart 			= inty_starty if inty_starty<0 & inty_starty!=.
replace timeuntilstart 		= 0 if inty_starty>=0 & inty_starty!=.
replace timeuntilstart = abs(timeuntilstart)
tab timeuntilstart if future25==0				
tab timeuntilstart if future25==1	

gen ongoing_yearsbetween	=ongoing25*yearsbetween
gen future_yearsbetween		=future25*yearsbetween
gen sameyear =cond((int_year==fyear),	1,0) 
gen nearstart=cond(yearsbetween==1 | sameyear==1,	1,0) 	
	
*dummies for different no of years to project start	
gen timeuntilstart0=cond(sameyear==1,	1,0) 	
gen timeuntilstart1=cond(timeuntilstart==1,	1,0) 	
gen timeuntilstart2=cond(timeuntilstart==2,	1,0) 	
gen timeuntilstart3=cond(timeuntilstart==3,	1,0) 	
gen timeuntilstart4=cond(timeuntilstart==4,	1,0) 	
gen timeuntilstart5ormore=cond(timeuntilstart>=5,	1,0) 	

*dummies for different no of years since project start
gen projectdur0=cond(sameyear==1 & ongoing25==1,	1,0) 
gen projectdur1=cond(project_duration==1,	1,0) 
gen projectdur2=cond(project_duration==2,	1,0) 
gen projectdur3=cond(project_duration==3,	1,0) 
gen projectdur4=cond(project_duration==4,	1,0) 
gen projectdur5ormore=cond(project_duration>=5,	1,0) 

gen noproject25=cond(numberwithin25==0,	1,0)  

*dummies for future and ongoing project starting within 5 years	
gen future25within5=cond((future25==1 & yearsbetween<=5),	1,0) 	
gen ongoing25within5=cond((ongoing25==1 & yearsbetween<=5),	1,0) 		
tab future25within5		if restrictedestsample==1
tab ongoing25within5		if restrictedestsample==1

gen future25within4=cond((future25==1 & yearsbetween<=4),	1,0) 	
gen ongoing25within4=cond((ongoing25==1 & yearsbetween<=4),	1,0) 		
tab future25within4		if restrictedestsample==1
tab ongoing25within4		if restrictedestsample==1

gen future25within3=cond((future25==1 & yearsbetween<=3),	1,0) 	
gen ongoing25within3=cond((ongoing25==1 & yearsbetween<=3),	1,0) 
tab future25within3		if restrictedestsample==1
tab ongoing25within3		if restrictedestsample==1

gen future25within2=cond((future25==1 & yearsbetween<=2),	1,0) 	
gen ongoing25within2=cond((ongoing25==1 & yearsbetween<=2),	1,0) 		
tab future25within2		if restrictedestsample==1
tab ongoing25within2		if restrictedestsample==1	
	
*linear time trend variable
gen timetrend=int_year
recode timetrend (2002=1)
recode timetrend (2003=2)
recode timetrend (2004=3)
recode timetrend (2005=4)
recode timetrend (2006=5)
recode timetrend (2007=6)
recode timetrend (2008=7)
recode timetrend (2009=8)
recode timetrend (2010=9)
recode timetrend (2011=10)
recode timetrend (2012=11)
recode timetrend (2013=12)
recode timetrend (2014=13)
recode timetrend (2015=14)

gen timetrendongoing=timetrend*ongoing25
gen timetrendfuture=timetrend*future25

/*******************presidential turnover. ignore presidential turnover within the same party
	
tab country if restrictedestsample==1
tab int_year if restrictedestsample==1			//2005-2015
	
*benin: president change 2006. but same party, so keep all
*keep if...... (country=="Benin")
tab int_year if country=="Benin" & restrictedestsample==1	
tab int_year if country=="Benin" & restrictedestsample==1	& ongoing25==1		//2008, 2011, 2014
tab int_year if country=="Benin" & restrictedestsample==1	& future25==1		//mostly 2005, only 7 in 2008

*Botswana: president change april 2008. but same party so keep all 
*keep if...... (country=="Botswana")
tab int_year if country=="Botswana" & restrictedestsample==1	
tab int_year if country=="Botswana" & restrictedestsample==1	& ongoing25==1		//2008, 2012, 2014
tab int_year if country=="Botswana" & restrictedestsample==1	& future25==1		//2005 and 2008
tab dateintr if country=="Botswana" & restrictedestsample==1						//2008 interviews took place in september acc to codebook, no datevar 
br dateintr dateintr2 dateintr3 if country=="Botswana" & restrictedestsample==1 & int_year==2008			

*Cape Verde: president and party change sept 2011. TURNOVER... loose all ongoing if we exclude those in 2011, so exclude cape verde
*keep if...... 
tab int_year if country=="Cape Verde" & restrictedestsample==1	
tab int_year if country=="Cape Verde" & restrictedestsample==1	& ongoing25==1		//2011
tab int_year if country=="Cape Verde" & restrictedestsample==1	& future25==1		//2005 and 2008
tab dateintr if country=="Cape Verde" & restrictedestsample==1						//the 2011 interviews in dec, so after election

*Kenya: president change  2013 (same ethnic group though). TURNOVER.. exclude those after 2014
*keep if...... (country=="Kenya" & int_year<2013)
tab int_year if country=="Kenya" & restrictedestsample==1	
tab int_year if country=="Kenya" & restrictedestsample==1	& ongoing25==1		//2005, 2008, 2011, 2014
tab int_year if country=="Kenya" & restrictedestsample==1	& future25==1		//mostly 2005 few 2008

*Liberia: president change 2006. TURNOVER, exclude those before 2006
*keep if...... (country=="Liberia" & int_year>2006)
tab int_year if country=="Liberia" & restrictedestsample==1	
tab int_year if country=="Liberia" & restrictedestsample==1	& ongoing25==1		//2005, 2008, 2012, 2015
tab int_year if country=="Liberia" & restrictedestsample==1	& future25==1		//2008 (few)

*Madagascar: president change 2009, TURNOVER, exclude those after 2008
*keep if...... (country=="Madagascar" & int_year<2009)
tab int_year if country=="Madagascar" & restrictedestsample==1	
tab int_year if country=="Madagascar" & restrictedestsample==1	& ongoing25==1		//2008 and 2013
tab int_year if country=="Madagascar" & restrictedestsample==1	& future25==1		//most in 2005, few in 2008 

*Malawi: president died april 2012, vice president took over, TURNOVER, exclude those after 2008
*keep if...... (country=="Malawi" & int_year<2012)
tab int_year if country=="Malawi" & restrictedestsample==1	
tab int_year if country=="Malawi" & restrictedestsample==1	& ongoing25==1		//2008, 2012, 2014
tab int_year if country=="Malawi" & restrictedestsample==1	& future25==1		//2005, 2008 
tab dateintr if country=="Malawi" & restrictedestsample==1						//the 2012 interviews in june/july, so after president died

*Mali: no change in president, can keep all
*keep if...... (country=="Mali")
tab int_year if country=="Mali" & restrictedestsample==1	
tab int_year if country=="Mali" & restrictedestsample==1	& ongoing25==1		//2005, 2008
tab int_year if country=="Mali" & restrictedestsample==1	& future25==1		//2005, 2008 	

*Namibia: no change in presidency over the period, can keep all
*keep if...... (country=="Namibia")
tab int_year if country=="Namibia" & restrictedestsample==1	
tab int_year if country=="Namibia" & restrictedestsample==1	& ongoing25==1		//2012, 2014
tab int_year if country=="Namibia" & restrictedestsample==1	& future25==1		//2006, 2008, 2012 	
	
*Nigeria: change 2007, 2010 same party though, so ok keep all
*keep if...... (country=="Nigeria")
tab int_year if country=="Nigeria" & restrictedestsample==1	
tab int_year if country=="Nigeria" & restrictedestsample==1	& ongoing25==1		//2008, 2012, 2014
tab int_year if country=="Nigeria" & restrictedestsample==1	& future25==1		//2005
	
*Senegal: change 2012... keep those before 2012
*keep if...... (country=="Senegal" & int_year<2012)
tab int_year if country=="Senegal" & restrictedestsample==1	
tab int_year if country=="Senegal" & restrictedestsample==1	& ongoing25==1		//2005, 2008, 2013, 2014
tab int_year if country=="Senegal" & restrictedestsample==1	& future25==1		//2005
*/	
				
gen 	noturnover		=cond((country=="Senegal" & int_year<2012)  | (country=="Nigeria") | (country=="Namibia")	| (country=="Mali") | (country=="Malawi" & int_year<2012)		| 	(country=="Madagascar" & int_year<2009)   | (country=="Liberia" & int_year>2006) | (country=="Kenya" & int_year<2013)  | (country=="Botswana") | (country=="Benin")	==1,1,0) 				if restrictedestsample==1	


*creating help variables to get coefficients that constitute the difference between the ongoing and future coefficients in the coefplots
gen sum_ongoingfuture25 = ongoing25+future25 	
gen sum_ongoingfuture10 = ongoing10+future10 
gen sum_ongoingfuture50 = ongoing50+future50 
gen sum_ongoingfuture75 = ongoing75+future75 

gen diffhelp25 = ongoing25
gen diffhelp10 = ongoing10
gen diffhelp50 = ongoing50
gen diffhelp75 = ongoing75

*for when having time restriction on ongoing (or both)
gen diffhelp25within5 = ongoing25within5
gen diffhelp25within4 = ongoing25within4
gen diffhelp25within3 = ongoing25within3
gen diffhelp25within2 = ongoing25within2

*time restriction on future
gen sum_ongoingfuture25within5 = ongoing25+future25within5
gen sum_ongoingfuture25within4 = ongoing25+future25within4
gen sum_ongoingfuture25within3 = ongoing25+future25within3
gen sum_ongoingfuture25within2 = ongoing25+future25within2

*time restriction on ongoing
gen sum_ongoingwithin5future25 = ongoing25within5+future25
gen sum_ongoingwithin4future25 = ongoing25within4+future25
gen sum_ongoingwithin3future25 = ongoing25within3+future25
gen sum_ongoingwithin2future25 = ongoing25within2+future25

*time restriction on both
gen sum_bothwithin5 = ongoing25within5+future25within5
gen sum_bothwithin4 = ongoing25within4+future25within4
gen sum_bothwithin3 = ongoing25within3+future25within3
gen sum_bothwithin2 = ongoing25within2+future25within2
		
	
	
	
	
compress
save "$aiddata/chinese all projects_afro precision code 1 2 3 data_replication.dta", replace

			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			


********************************nearstat china all flows vs afrobar (precision code 1)*****************************




global afrobar="C:\Users\xisaka\ShareFile\Personal Folders\Afrobarometer"
global aiddata="C:\Users\xisaka\ShareFile\Personal Folders\AidData"
global china_ethnic= "C:\Users\xisaka\Dropbox\Ias\MINA PAPERS\chinese aid and ethnic sentiments\stata"
global temp="C:\Users\xisaka\ShareFile\Personal Folders\temp data"

set more off
set matsize 800



**********open the collapsed Afro

use "$afrobar/Afro_collapsed_precisioncode1_replication.dta", clear




** Add the aid projects		
append using  "$aiddata\all chinese projects to afrobar countries.dta", keep(project_location_id  latitude longitude)		

** Sort so that nearstat will run, putting all projects first
*gsort project_location_id
* Not numeric so it dd not put the projects first.
gsort latitude


	

** First count how many projects are within 200 km to know how many loops to run nearstat
set more off
foreach dist in 10 25 50 75 100 200 {
	nearstat g_lat g_lon, ///
		near(latitude longitude) distvar(_1projectdist`dist') ///
		ncount(numberwithin`dist') dband(0 `dist') favor(speed)
}
drop _1projectdist10 _1projectdist25 _1projectdist50 _1projectdist75 _1projectdist100 _1projectdist200

** Run nearstat loop, with number of loops equal to max of numberwithin200
sum numberwithin200
forvalues i = 1/`r(max)' {
	nearstat g_lat g_lon if `i'<=numberwithin200, ///
		near(latitude longitude) distvar(_`i'projectdist) nid(project_location_id _`i'closestproject) ///
		kth(`i') favor(speed)
}

set more off


** Add the closest project to those that don't have projects within 200km.
nearstat g_lat g_lon if numberwithin200==0, ///
	near(latitude longitude) distvar(not200_projectdist) nid(project_location_id not200_closestproject) ///
	kth(1) favor(speed)
	
* Temporary save if something goes wrong
save "$temp/temp_aid.dta", replace

replace _1closestproject=not200_closestproject if numberwithin200==0
replace _1projectdist=not200_projectdist if numberwithin200==0

*Now drop all projects (they are now linked to clusters rowwise)
drop if !missing(latitude)
drop project_location_id latitude longitude
drop if missing(g_lat)

*compress attempts to reduce the amount of memory used by your data.
compress


** Merge with detailed data on projects				
set more off
sum numberwithin200
forvalues i = 1/`r(max)' {
	rename _`i'closestproject project_location_id
	merge m:1 project_location_id using "$aiddata\all chinese projects to afrobar countries.dta", keepusing(project_id startyear endyear)
	rename project_id _`i'project_id
	rename startyear _`i'startyear
	rename endyear _`i'endyear
	rename project_location_id _`i'closestproject 
	drop if _merge == 2
	drop _merge
}

compress



** Merge back with precollapse data. Now all respondents with the same coordinates will have exactly the same projects identified nearby. 
set more off
merge 1:m g_lat g_lon using "$afrobar/Afro_tot_precisioncode1_replication.dta"
drop if _merge==2
drop _merge

save "$temp/temp_aidC.dta", replace





use "$temp/temp_aidC.dta", clear

destring *startyear, replace		

destring *endyear, replace

set more off
foreach dist in 25 50 75 100 200 {
	gen byte active`dist' = .
	gen byte suspended`dist' = .
	gen byte inactive`dist' = .
	gen byte activecodehelp`dist' = .
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Create a helpvariable to be used for suspended
		replace activecodehelp`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' & _`i'endyear>int_year
		* Resp active if any project is active within X km in or after interview year
		replace active`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' 
		label variable active`dist' "Active if any project is active"
		* Resp suspended if any project is suspended within X km
		replace suspended`dist' = 1 if _`i'endyear<int_year & _`i'endyear<.  & _`i'projectdist <= `dist' 
		* But not if at least one project is active within X km
		replace suspended`dist' = 0 if activecodehelp`dist'==1
		label variable suspended`dist' "Suspended if all projects have ended"
		* Resp inactive if any project is inactive within X km
		replace inactive`dist' = 1 if _`i'startyear>int_year & _`i'projectdist <= `dist'
		label variable inactive`dist' "Inactive if a project will start, conditional on no projects before"
		* But not if at least one project is either active or suspended within X km
		* Area considered "contaminated"
		replace inactive`dist' = 0 if active`dist'==1 | suspended`dist'==1

	}
	* Resp not active/inactive/suspended if never replaced in the loop above
	replace active`dist'=0 if missing(active`dist')
	replace inactive`dist'=0 if missing(inactive`dist')
	replace suspended`dist'=0 if missing(suspended`dist')
	noisily di as text "Calculated distance " as result `dist'
}


compress
save "$temp/temp_aid2C.dta", replace		//large file




* Fixed effects
set more off
** Generate variables denoting first year a project is active within X km of respondent
foreach dist in 25 50 75 100 200 {
	gen firstyear`dist'=.
	gen nextyear`dist'=.
	gen lastyear`dist'=0
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Firstyear replaced if the i-th project within X km opens before the i-1-th projectdist.
		replace firstyear`dist' = _`i'startyear if _`i'startyear<firstyear`dist' & _`i'projectdist<=`dist'
		label variable firstyear`dist' "Year of first project in area"
		* Lastyear replaced if the i-th projects's last year is after the previous value
		* Note: will equal 9999 if there is still a project there in 2013/2014.
		replace lastyear`dist' = _`i'endyear if _`i'endyear>lastyear`dist' & _`i'projectdist<=`dist'
		label variable lastyear`dist' "Year that last project closed in area"
	}
}


** Create variable "firstactive"
** defined as the ID of the first mine to open within X km of the respondent,
** This identifies first active project for both active and inactive respondents!

set more off

foreach dist in 25 50 75 100 200 {
	gen firstactive`dist'_1 = ""
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Replace firstactive if the i-th closest project is within X km and
		* this project opens in the same year as firstyear and 
		replace firstactive`dist'_1 = _`i'closestproject if _`i'projectdist<=`dist' & ///
			firstyear`dist'==_`i'startyear 
		label variable firstactive`dist'_1 "Name of first project in area"
		}
}




** Generate variable denoting first active project
foreach dist in 25 50 75 {
gen projfixed`dist' = firstactive`dist'_1
}

			encode _1closestproject, gen(first_proj)
			
			rename projfixed50 projfixed50_org
			encode projfixed50_org, gen(projfixed50)
						rename projfixed75 projfixed75_org
			encode projfixed75_org, gen(projfixed75)
						rename projfixed25 projfixed25_org
			encode projfixed25_org, gen(projfixed25)
		
	codebook projfixed50*
	drop projfixed50_org projfixed25_org projfixed75_org
	

set more off
*generate country and year dummies (and region dummies if we want to use that)
tab int_year, gen(yd)
tab country, gen(cd) 
*tab region, gen (rd)

* Generate distance variable
gen distance=_1projectdist



compress
save "$aiddata/chinese all projects_afro restricted to precision code 1 data_replication.dta", replace

						
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
			
	



********************************nearstat china_ODA vs afrobar (precision code 1, 2, 3)




global afrobar="C:\Users\xisaka\ShareFile\Personal Folders\Afrobarometer"
global aiddata="C:\Users\xisaka\ShareFile\Personal Folders\AidData"
global china_ethnic= "C:\Users\xisaka\Dropbox\Ias\MINA PAPERS\chinese aid and ethnic sentiments\stata"
global temp="C:\Users\xisaka\ShareFile\Personal Folders\temp data"

set more off
set matsize 800



**********open the collapsed Afro

use "$afrobar/Afro_collapsed_precisioncode123_replication.dta", clear





** Add the aid projects		
append using  "$aiddata\ODA_like chinese projects to afrobar countries.dta", keep(project_location_id  latitude longitude)		

** Sort so that nearstat will run, putting all projects first
*gsort project_location_id
* Not numeric so it dd not put the projects first.
gsort latitude


	

** First count how many projects are within 200 km to know how many loops to run nearstat
set more off
foreach dist in 10 25 50 75 100 200 {
	nearstat g_lat g_lon, ///
		near(latitude longitude) distvar(_1projectdist`dist') ///
		ncount(numberwithin`dist') dband(0 `dist') favor(speed)
}
drop _1projectdist10 _1projectdist25 _1projectdist50 _1projectdist75 _1projectdist100 _1projectdist200

** Run nearstat loop, with number of loops equal to max of numberwithin200
sum numberwithin200
forvalues i = 1/`r(max)' {
	nearstat g_lat g_lon if `i'<=numberwithin200, ///
		near(latitude longitude) distvar(_`i'projectdist) nid(project_location_id _`i'closestproject) ///
		kth(`i') favor(speed)
}

set more off


** Add the closest project to those that don't have projects within 200km.
nearstat g_lat g_lon if numberwithin200==0, ///
	near(latitude longitude) distvar(not200_projectdist) nid(project_location_id not200_closestproject) ///
	kth(1) favor(speed)
	
* Temporary save if something goes wrong
save "$temp/temp_aid.dta", replace

replace _1closestproject=not200_closestproject if numberwithin200==0
replace _1projectdist=not200_projectdist if numberwithin200==0

*Now drop all projects (they are now linked to clusters rowwise)
drop if !missing(latitude)
drop project_location_id latitude longitude
drop if missing(g_lat)

*compress attempts to reduce the amount of memory used by your data.
compress



*relevant variables in WB-data: project_id start_year end_year recipients aiddata_tuff_id precision_code latitude longitude



** Merge with detailed data on projects				
set more off
sum numberwithin200
forvalues i = 1/`r(max)' {
	rename _`i'closestproject project_location_id
	merge m:1 project_location_id using "$aiddata\ODA_like chinese projects to afrobar countries.dta", keepusing(project_id startyear endyear)
	rename project_id _`i'project_id
	rename startyear _`i'startyear
	rename endyear _`i'endyear
	rename project_location_id _`i'closestproject 
	drop if _merge == 2
	drop _merge
}

compress






* Not wave here.
** Merge back with precollapse data. Now all respondents with the same coordinates will have exactly the same projects identified nearby. 
set more off
merge 1:m g_lat g_lon using "$afrobar/Afro_tot_precisioncode123_replication.dta"
drop if _merge==2
drop _merge

save "$temp/temp_aidC.dta", replace





use "$temp/temp_aidC.dta", clear

destring *startyear, replace		

destring *endyear, replace


set more off
foreach dist in 25 50 75 100 200 {
	gen byte active`dist' = .
	gen byte suspended`dist' = .
	gen byte inactive`dist' = .
	gen byte activecodehelp`dist' = .
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Create a helpvariable to be used for suspended
		replace activecodehelp`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' & _`i'endyear>int_year
		* Resp active if any project is active within X km in or after interview year
		replace active`dist' = 1 if _`i'startyear<=int_year & _`i'projectdist <= `dist' 
		label variable active`dist' "Active if any project is active"
		* Resp suspended if any project is suspended within X km
		replace suspended`dist' = 1 if _`i'endyear<int_year & _`i'endyear<.  & _`i'projectdist <= `dist' 
		* But not if at least one project is active within X km
		replace suspended`dist' = 0 if activecodehelp`dist'==1
		label variable suspended`dist' "Suspended if all projects have ended"
		* Resp inactive if any project is inactive within X km
		replace inactive`dist' = 1 if _`i'startyear>int_year & _`i'projectdist <= `dist'
		label variable inactive`dist' "Inactive if a project will start, conditional on no projects before"
		* But not if at least one project is either active or suspended within X km
		* Area considered "contaminated"
		replace inactive`dist' = 0 if active`dist'==1 | suspended`dist'==1

	}
	* Resp not active/inactive/suspended if never replaced in the loop above
	replace active`dist'=0 if missing(active`dist')
	replace inactive`dist'=0 if missing(inactive`dist')
	replace suspended`dist'=0 if missing(suspended`dist')
	noisily di as text "Calculated distance " as result `dist'
}


compress
save "$temp/temp_aid2C.dta", replace		//large file




* Fixed effects
set more off
** Generate variables denoting first year a project is active within X km of respondent
foreach dist in 25 50 75 100 200 {
	gen firstyear`dist'=.
	gen nextyear`dist'=.
	gen lastyear`dist'=0
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Firstyear replaced if the i-th project within X km opens before the i-1-th projectdist.
		replace firstyear`dist' = _`i'startyear if _`i'startyear<firstyear`dist' & _`i'projectdist<=`dist'
		label variable firstyear`dist' "Year of first project in area"
		* Lastyear replaced if the i-th projects's last year is after the previous value
		* Note: will equal 9999 if there is still a project there in 2013/2014.
		replace lastyear`dist' = _`i'endyear if _`i'endyear>lastyear`dist' & _`i'projectdist<=`dist'
		label variable lastyear`dist' "Year that last project closed in area"
	}
}


** Create variable "firstactive"
** defined as the ID of the first mine to open within X km of the respondent,
** This identifies first active project for both active and inactive respondents!

set more off

foreach dist in 25 50 75 100 200 {
	gen firstactive`dist'_1 = ""
	sum numberwithin`dist'
	forvalues i = 1/`r(max)' {
		* Replace firstactive if the i-th closest project is within X km and
		* this project opens in the same year as firstyear and 
		replace firstactive`dist'_1 = _`i'closestproject if _`i'projectdist<=`dist' & ///
			firstyear`dist'==_`i'startyear 
		label variable firstactive`dist'_1 "Name of first project in area"
		}
}




** Generate variable denoting first active project
foreach dist in 25 50 75 {
gen projfixed`dist' = firstactive`dist'_1
}

			encode _1closestproject, gen(first_proj)
			
			rename projfixed50 projfixed50_org
			encode projfixed50_org, gen(projfixed50)
						rename projfixed75 projfixed75_org
			encode projfixed75_org, gen(projfixed75)
						rename projfixed25 projfixed25_org
			encode projfixed25_org, gen(projfixed25)
		
	codebook projfixed50*
	drop projfixed50_org projfixed25_org projfixed75_org
	


*generate country and year dummies (and region dummies if we want to use that)
tab int_year, gen(yd)
tab country, gen(cd) 
*tab region, gen (rd)

* Generate distance variable
gen distance=_1projectdist




		
*save			
compress
save "$aiddata/chinese ODA_afro precision code 1 2 3 data_replication.dta", replace
		
			
		
		
		
		
		
		
		
		
		
		
		
		
		
		
		
********************











